# -*- coding: utf-8 -*-
"""Table1,2_figure1,5,6.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1I8BDfZyHFXoHNZi8YGoZoHUHHxG-hApD

## The next Block of code, load the data and calculats $\Pr\{A\}$
"""

import numpy as np
import pandas as pd

df = pd.read_csv('totals.csv')
Num_black = float(df['Black'][0])
Num_white = df['Non- Hispanic white'][0]
print('The number of black people:{} \nThe number of white people:{}'.format(Num_black,Num_white))
pA0 = float(Num_white/(Num_black+Num_white) )# Calculating \Pr\{A=0\}
pA1 = (1-pA0)# Calculating \Pr\{A=1\}

"""## Next block of code is generating figure 1."""

font = {'family' : 'normal',
        'size'   : 11}
import matplotlib
matplotlib.rc('font', **font)


df_cdf = pd.read_csv('transrisk_cdf_by_race_ssa.csv')
Score = np.array(df_cdf['Score'])
cdf_white = np.array(df_cdf['Non- Hispanic white'])/100
cdf_black = np.array(df_cdf['Black'])/100
pdf_white = cdf_white[1:] - cdf_white[:-1]
pdf_black = cdf_black[1:] - cdf_black[:-1]


df_per = pd.read_csv('transrisk_performance_by_race_ssa.csv') 
Score = np.array(df_cdf['Score'])
per_white = 1-np.array(df_per['Non- Hispanic white'])/100#white is group 0
per_black = 1-np.array(df_per['Black'])/100#black is group 1
cdf_R = pA1 * cdf_black+ cdf_white * pA0
pRgivenYA0 = pdf_white * per_white[1:] / np.sum(pdf_white * per_white[1:])  
pRgivenYA1 = pdf_black * per_black[1:] / np.sum(pdf_black * per_black[1:]) 
CDFRgivenYA0 = [np.sum(pRgivenYA0[:i+1]) for i in range(len(pRgivenYA0))]
CDFRgivenYA1 = [np.sum(pRgivenYA1[:i+1]) for i in range(len(pRgivenYA1))]
import matplotlib.pyplot as plt

plt.figure(figsize=(5,4))

plt.plot(np.linspace(0,100,len(CDFRgivenYA0)),CDFRgivenYA0, label=r'$F_{R|Y=1,A=0}(\rho)$',linewidth=4)
plt.plot(np.linspace(0,100,len(CDFRgivenYA0)),CDFRgivenYA1,'r' ,label=r'$F_{R|Y=1,A=1}(\rho)$',linewidth=4)
plt.legend(loc=0)
plt.xlabel(r'Score $ (\rho) $')
plt.grid()
plt.savefig('CDF.eps', format='eps',bbox_inches='tight')

"""#Next, we calculate optimal thresholds for the ES fairness notions

"""

def ES_fairness(gamma = 0.001, psi = 1 ):
  obj = 0
  n0 = 100
  for i in range(0,len(Score)-1):
    for j in range(0,len(Score)-1): 
        noSelect = cdf_white[i] * pA0 + cdf_black[j] * pA1  
        noSelectn0 = (noSelect**n0)
        numerator0 = pA0 *float(np.sum(pdf_white[i:] * per_white[i:-1]))
        numerator1 = pA1 *float(np.sum(pdf_black[j:] * per_black[j:-1]))
        obj_temp = float(numerator0+numerator1)/float(1-noSelect)
        if np.abs(numerator0-numerator1)<=gamma*(1-noSelect) and obj_temp > obj   and noSelectn0 <=psi:
            obj = obj_temp
            SR0 = float(numerator0)/float(1-noSelect)
            SR1 = numerator1/(1-noSelect)
            tau_0 = float(Score[i])
            tau_1 = float(Score[j])
  print('==Results for Equal Selection: gamma = {} psi = {}'.format(gamma,psi))
  print('Threshold0: {}, Threshold1: {}, Accuracy: {}'.format(tau_0,tau_1,obj))
  print('Pr(E_0,tildeY=1): {}, Pr(E_1,tildeY=1): {}'.format(SR0,SR1))

#change gamma and psi to generate the results in Table 1 and Table 2
ES_fairness(gamma = 0.01, psi = 1 )

"""# Next, we calculate optimal thresholds for the equal opportunity fairness notion

"""

def EO_fairness(gamma = 0.001,  psi = 1):
  obj = 0
  n0 = 100
  for i in range(0,len(Score)-1):
    for j in range(0,len(Score)-1): 
        noSelect = cdf_white[i] * pA0 + cdf_black[j] * pA1  
        noSelectn0 = (noSelect**n0)
        numerator0 = pA0 *np.sum(pdf_white[i:] * per_white[i:-1])
        numerator1 = pA1 *np.sum(pdf_black[j:] * per_black[j:-1])
        obj_temp = float(numerator0+numerator1)/float(1-noSelect)
        EqualOp0 = np.sum((pdf_white[i:]*per_white[i:-1]))/np.sum(pdf_white * per_white[:-1])
        EqualOp1 = np.sum((pdf_black[j:]*per_black[j:-1])/np.sum(pdf_black * per_black[:-1]))
        if np.abs(EqualOp0-EqualOp1)<gamma and obj_temp > obj and noSelectn0 <=psi:
            Eq0 = EqualOp0
            Eq1 = EqualOp1
            obj = obj_temp
            gamma_m = noSelect
            SR0 = numerator0/(1-noSelect)
            SR1 = numerator1/(1-noSelect)
            tau_0 = Score[i]
            tau_1 = Score[j]
  print('==Results for Equal Opportunity: gamma = {} psi = {}'.format(gamma,psi))
  print('Threshold0: {}, Threshold1: {}, Accuracy: {}'.format(tau_0,tau_1,obj))
  print('Pr(E_0,tildeY=1): {}, Pr(E_1,tildeY=1): {}'.format(SR0,SR1))

#change gamma and psi to generate the results in Table 1 and Table 2
EO_fairness(gamma = 0.001,  psi = 0.5)

"""Next, we calculate optimal thresholds for the equal opportunity fairness notion"""

def SP_fairness(gamma = 0.001,  psi = 1):
  obj = 0
  n0 = 100
  for i in range(0,len(Score)-1):
    for j in range(0,len(Score)-1): 
        noSelect = cdf_white[i] * pA0 + cdf_black[j] * pA1  
        noSelectn0 = (noSelect**n0)
        numerator0 = pA0 *np.sum(pdf_white[i:] * per_white[i:-1])
        numerator1 = pA1 *np.sum(pdf_black[j:] * per_black[j:-1])
        obj_temp = float(numerator0+numerator1)/float(1-noSelect)
        EqualOp0 = np.sum((pdf_white[i:]))/np.sum(pdf_white  )
        EqualOp1 = np.sum((pdf_black[j:])/np.sum(pdf_black  ))
        if np.abs(EqualOp0-EqualOp1)<gamma and obj_temp > obj and noSelectn0 <=psi:
            Eq0 = EqualOp0
            Eq1 = EqualOp1
            obj = obj_temp
            gamma_m = noSelect
            SR0 = numerator0/(1-noSelect)
            SR1 = numerator1/(1-noSelect)
            tau_0 = Score[i]
            tau_1 = Score[j]
  print('==Results for Statistical Parity: gamma = {} psi = {}'.format(gamma,psi))
  print('Threshold0: {}, Threshold1: {}, Accuracy: {}'.format(tau_0,tau_1,obj))
  print('Pr(E_0,tildeY=1): {}, Pr(E_1,tildeY=1): {}'.format(SR0,SR1))
#change gamma and psi to generate the results in Table 1 and Table 2
SP_fairness(gamma = 0.001,  psi =1)

"""#The next two blocks, generate the figures in section A.6



"""

prs = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
n0 = 50
obj_eq = []
Eq0 = []
Eq1 = []
SR0_eq = []
SR1_eq = []
obj_ESR = []
SR0_ESR = []#append(0)
SR1_ESR = [] #append(0)
for p in prs:
  obj_eq.append(0)
  Eq0.append(0)
  Eq1.append(0)
  SR0_eq.append(0)
  SR1_eq.append(0)
  for i in range(0,len(Score)-1):
    for j in range(0,len(Score)-1): 
        gamma = cdf_white[i]*pA0 + cdf_black[j]*pA1
        noSelect = cdf_white[i] * pA0 + cdf_black[j] * pA1  
        noSelectn0 = (noSelect**n0)
        numerator0 = pA0 *np.sum(pdf_white[i:] * per_white[i:-1])
        numerator1 = pA1 *np.sum(pdf_black[j:] * per_black[j:-1])
        obj_temp = float(numerator0+numerator1)/float(1-gamma)
        EqualOp0 = np.sum((pdf_white[i:]*per_white[i:-1]))/np.sum(pdf_white * per_white[:-1])
        EqualOp1 = np.sum((pdf_black[j:]*per_black[j:-1])/np.sum(pdf_black * per_black[:-1]))
        if np.abs(EqualOp0-EqualOp1)<0.01 and obj_temp > obj_eq[-1] and noSelectn0 <=p:
            Eq0[-1] = (EqualOp0)
            Eq1[-1] = (EqualOp1)
            obj_eq[-1] = (obj_temp)
            gamma_m = gamma
            SR0_eq[-1] = (numerator0/(1-gamma))
            SR1_eq[-1] = (numerator1/(1-gamma))
  obj_ESR.append(0)
  SR0_ESR.append(0)
  SR1_ESR.append(0)
  for i in range(0,len(Score)-1):
    for j in range(0,len(Score)-1): 
        gamma = cdf_white[i]*pA0 + cdf_black[j]*pA1
        noSelect = cdf_white[i] * pA0 + cdf_black[j] * pA1  
        noSelectn0 = (noSelect**n0)
        numerator0 = pA0 *float(np.sum(pdf_white[i:] * per_white[i:-1]))
        numerator1 = pA1 *float(np.sum(pdf_black[j:] * per_black[j:-1]))
        obj_temp = float(numerator0+numerator1)/float(1-gamma)
        EqualOp0 = np.sum((pdf_white[i:]*per_white[i:-1]))/np.sum(pdf_white * per_white[:-1])
        EqualOp1 = np.sum((pdf_black[j:]*per_black[j:-1])/np.sum(pdf_black * per_black[:-1]))
        if np.abs(numerator0-numerator1)<=0.01*(1-noSelect) and obj_temp > obj_ESR[-1]  and noSelectn0 <=p:
            #Eq0 = EqualOp0
            #Eq1 = EqualOp1
            obj_ESR[-1] = obj_temp
            SR0_ESR[-1] = float(numerator0)/float(1-noSelect)
            SR1_ESR[-1] = numerator1/(1-noSelect)
            #tau_0 = float(Score[i])
            #tau_1 = float(Score[j])

font = {'family' : 'normal',
        'size'   : 11}
import matplotlib
matplotlib.rc('font', **font)
plt.figure(figsize=(5,4))
plt.plot(prs,obj_eq,'c--',label=r'$\Pr\{\tilde{Y}=1\}$ EO',linewidth=4)
plt.plot(prs,obj_ESR,'m:' ,label=r'$\Pr\{\tilde{Y}=1\}$ ES',linewidth=4)
plt.legend(loc='lower right')
plt.xlabel(r'$\psi$')
plt.grid()
plt.savefig('time21.eps', format='eps',bbox_inches='tight')

plt.figure(figsize=(5,4))
plt.plot(prs,SR0_ESR,'r',label=r'$\Pr\{\tilde{Y}=1,E_0\}$ ES',linewidth=4)
plt.plot(prs,SR1_ESR,'b:' ,label=r'$\Pr\{\tilde{Y}=1,E_1\}$ ES',linewidth=4)
plt.plot(prs,SR0_eq,'c',label=r'$\Pr\{\tilde{Y}=1,E_0\}$ EO',linewidth=4)
plt.plot(prs,SR1_eq,'m' ,label=r'$\Pr\{\tilde{Y}=1,E_1\}$ EO',linewidth=4)
plt.legend(loc='lower right',ncol=2,bbox_to_anchor=(1.02, 0.15) )
plt.xlabel(r'$\psi$')
plt.grid()
plt.savefig('time2.eps', format='eps',bbox_inches='tight')